/*
The user needs to manually change the working directory to the folder in which the source data files are stored. See Read_Me.pdf.

The following pair of commands create a temporary folder in the D drive where temporary files are stored. The user can subsequently delete this temporary folder manually.
*/
global tmp d:\tmp\environment_dynamic_contests
capture !mkdir $tmp

clear all
set matsize 10000
set maxvar 100000
set more off
capture log close
capture restore



capture program drop environmentAustralia
program define environmentAustralia
     
	 *Time is Australia Eastern time.
	 *The distance between Alphington and Melbourne Park is 8.8 km. The distance between Footscray and Melbourne Park is 8.8 km.
	 *The distance between Richmond and Melbourne Park is 2.5 km. Richmond API monitoring starts in December 2006.
	 *The distance between RMIT (Royal Melbourne Institute of Technology) and Melbourne Park is 2.8km. RMIT API monitoring ends in October 2006.
	
	 *1-h Airborne Particle Index, 2004-2014, source is Victoria EPA
	 import excel using "data_VictoriaEPA_API_2004_2014.xlsx", sheet(Alphington) cellrang(A2) firstrow clear
	 ren Alphington api_al
	 label var api_al "1-h Airborne Particle Index, Alphington"
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	 import excel using "data_VictoriaEPA_API_2004_2014.xlsx", sheet("RMIT 2003-2006") cellrang(A2) firstrow clear
	 ren RMIT api_rm
	 label var api_rm "1-h Airborne Particle Index, RMIT"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	 import excel using "data_VictoriaEPA_API_2004_2014.xlsx", sheet("Richmond 2006-2014") cellrang(A2) firstrow clear
	 ren Richmond api_ri
	 label var api_ri "1-h Airborne Particle Index, Richmond"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	 import excel using "data_VictoriaEPA_API_2004_2014.xlsx", sheet("Footscray") cellrang(A2) firstrow clear
	 ren Footscray api_fo
	 label var api_fo "1-h Airborne Particle Index, Footscray"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	
     *1-h dry bulb temperature (Celsius), 2004-2014, source is Victoria EPA
	 import excel using "data_VictoriaEPA_temperature_2004_2014.xlsx", sheet(Alphington) cellrang(A2) firstrow clear
	 ren Alphington tp_al
	 label var tp_al "1-h dry temperature (Celsius), Alphington"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	 import excel using "data_VictoriaEPA_temperature_2004_2014.xlsx", sheet("FOOTSCRAY") cellrang(A2) firstrow clear
	 ren Footscray tp_fo
	 label var tp_fo "1-h dry temperature (Celsius), Footscray"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace

     *1-h vector wind speed (m/s), 2004-2014, source is Victoria EPA
	 import excel using "data_VictoriaEPA_windSpeed_2004_2014.xlsx", sheet(Alphington) cellrang(A2) firstrow clear
	 ren Alphington wsp_al
	 label var wsp_al "1-h wind speed (m/s), Alphington"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
	 save $tmp\tmp_VictoriaEPA_2004_2014, replace
	 import excel using "data_VictoriaEPA_windSpeed_2004_2014.xlsx", sheet("Footscray") cellrang(A2) firstrow clear
	 ren Footscray wsp_fo
	 label var wsp_fo "1-h wind speed (m/s), Footscray"
	 merge 1:1 start_time_est using $tmp\tmp_VictoriaEPA_2004_2014, nogen
         
	 gen date = dofc(start_time_est)
     gen hour = hh(start_time_est)			//This is the start time of the 1-hour interval, i.e., 4 pm is the 1-hour mean from 16:00 to 16:59
     gen min  = mm(start_time_est)
     replace hour = hour+1 if min==59
     drop start_time_est min
     save $tmp\tmp_VictoriaEPA_2004_2014, replace
	
     *24-h PM2.5 every 3 days (ug/m3), 2004-2014, source is Victoria EPA
	 import excel using "data_VictoriaEPA_24h_PM25_2004_2014.xlsx", cellrang(A2) firstrow clear
	 ren DATE date
	 ren Alphington pm25_24h_al
	 ren Footscray  pm25_24h_fo
	 label var pm25_24h_al "24-h PM2.5 every 3 days (ug/m3), Alphington"
	 label var pm25_24h_fo "24-h PM2.5 every 3 days (ug/m3), Footscray"
	 foreach var in pm25_24h_al pm25_24h_fo {
		assert `var'>0
		}
	 merge 1:m date using $tmp\tmp_VictoriaEPA_2004_2014, nogen
			
 	 *Correlation between 24-h PM2.5 and daily mean API (see the end of Section 2 of the article)
	 preserve
		collapse (mean) pm25_24h_al pm25_24h_fo api_al api_fo, by(date)
		correl pm25_24h_al api_al
		correl pm25_24h_fo api_fo
	 restore
	 drop pm25_24h_al pm25_24h_fo
	
     save $tmp\AustraliaEnvironment, replace
	 erase $tmp\tmp_VictoriaEPA_2004_2014.dta

	
	*1-h Airborne Particle Index, 2015-2016, source is Victoria EPA
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("API 2015") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-ric, prefix(api_)
    save $tmp\tmp, replace
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("API 2016") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-ric, prefix(api_)
    append using $tmp\tmp
    save $tmp\tmp_VictoriaEPA_2015_2016, replace
	
    *1-h dry bulb temperature (Celsius), 2015-2016, source is Victoria EPA
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("DBT 2015") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-poi, prefix(tp_)
    save $tmp\tmp, replace
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("DBT 2016") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-poi, prefix(tp_)
    append using $tmp\tmp
	foreach x of varlist tp_* {
        local xlab: variable label `x'
        label var `x' "1-h dry temperature (Celsius), `xlab'
        }
    merge 1:1 dat using $tmp\tmp_VictoriaEPA_2015_2016, nogen
    save $tmp\tmp_VictoriaEPA_2015_2016, replace
    
	*1-h vector wind speed (m/s), 2015-2016, source is Victoria EPA
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("VWS 2015") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-poi, prefix(wsp_)
    save $tmp\tmp, replace
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("VWS 2016") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp-poi, prefix(wsp_)
    append using $tmp\tmp
    foreach x of varlist wsp_* {
        local xlab: variable label `x'
        label var `x' "1-h wind speed (m/s), `xlab'
        }
    merge 1:1 dat using $tmp\tmp_VictoriaEPA_2015_2016, nogen
    save $tmp\tmp_VictoriaEPA_2015_2016, replace

    *1-h PM2.5 (ug/m3), 2015-2016, source is Victoria EPA
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("BPM2.5 2015") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp, prefix(pm25_1h_)		//Renamed variable includes _1h_ to distinguish from _24h_ in the 2004-2014 data
    save $tmp\tmp, replace
    import excel using "data_VictoriaEPA_2015_2016.xlsx", sheet("BPM2.5 2016") cellrang(A2) firstrow clear
    renvars *, lower
    renvars *, trim(3)
    renvars alp foo, prefix(pm25_1h_)
    append using $tmp\tmp
    foreach x of varlist pm25_* {
        local xlab: variable label `x'
        label var `x' "1-h PM2.5 (ug/m3), `xlab'
        }
	foreach var in pm25_1h_al pm25_1h_fo {
		tab `var' if `var'<=0
		replace `var' = 0 if `var'<0
		}
    merge 1:1 dat using $tmp\tmp_VictoriaEPA_2015_2016, nogen		//PM2.5 is missing for 61 1-h observations
	erase $tmp\tmp.dta
    
	ren dat start_time_est
    split start_time_est
	gen date = date(start_time_est1, "DM20Y")
    gen hour = real(substr(start_time_est2,1,2))
	drop start_time_est start_time_est1 start_time_est2
    renvars *_foo *_ric *_alp, postdrop(1)
    foreach x of varlist api_* {
        local xlab: variable label `x'
        label var `x' "1-h Airborne Particle Index, `xlab'
        }
	save $tmp\tmp_VictoriaEPA_2015_2016, replace

	
	use $tmp\AustraliaEnvironment, clear
	merge 1:1 date hour using $tmp/tmp_VictoriaEPA_2015_2016, update replace nogen		//Merge in 2015-2016 data sent subsequently by Victoria EPA
	erase $tmp\tmp_VictoriaEPA_2015_2016.dta
	
	*Average across Alphington & Footscray (~9 km away) with temporal coverage throughout
	foreach var in api pm25_1h tp wsp {
		egen `var'=rowmean(`var'_al `var'_fo)
		drop `var'_al `var'_fo
		if "`var'"=="api" replace `var'=`var'_ri if `var'==.
		if "`var'"=="api" replace `var'=`var'_rm if `var'==.
		if "`var'"=="api" drop `var'_ri `var'_rm
		}
	drop *_box *_bri *_bro *_dan *_gee *_moo *_poi *_dee *_mel *_alt
	
	save $tmp\AustraliaEnvironment, replace
	 
	 
	*3-h weather data, source is NASA
	*Hour is local time. A 2 am reading refers to 23:00 to 01:59, a 5 am reading refers to 02:00 to 04:59 and so on.
    use data_NASA_Melbourne_2004_2016, clear
	collapse qair tair press wspd precipitation, by(date hour)		//In January 2016, there are two observations within date-hour, so take the mean
	renvars qair tair press wspd precipitation \ hm tp pr wsp pp
    replace hm = 0.263*pr*hm/exp(17.67*(tp-273.16)/(tp-29.65))			//http://earthscience.stackexchange.com/questions/2360/how-do-i-convert-specific-humidity-to-relative-humidity
	gen year = year(date)
	replace pp = pp*3600 if inlist(year,2004,2005,2006,2007,2016)		//Unit for pp is kg/m2/s in 2004-2007 and 2016, and mm/h in the other years. y mm/h means y kg/h on a 1 m2 surface, so 1 mm/h corresponds to 1 kg/m2/h or 1/3600 kg/m2/s
    drop tp pr wsp							//For Melbourne, we use temperature and wind speed data from Victoria EPA (1-hour means), not NASA (3-h means); see Table 1 caption
    	
	expand 3
	assert hour<24
    bysort date hour: replace hour = hour[3]-(3-_n) if hour>2
	bysort date hour: replace hour = 0 if hour==2 & _n==1
	bysort date hour: replace hour = 1 if hour==2 & _n==1
	
	
    merge 1:1 date hour using $tmp\AustraliaEnvironment, nogen		//Combine pollution and weather
	gen month = month(date)
	replace year = year(date) if year==.
	order date hour year month
	
	
	*Define time series, by hour
	gen date_text = string(date,"%td")
	gen hd = string(hour)+" "+date_text
	gen double dt = clock(hd,"h DMY")
	tsset dt, delta(3600000)
	gen t1 = .
	gen t2 = .
	foreach x of varlist tp hm wsp pp api pm25_1h {
		replace t1 = F.`x'
		replace t2 = F2.`x'
		egen `x'_t_f2 = rowmean(`x' t1 t2)
		drop `x'
		}
	drop date_text hd dt t1 t2
	ren pm25_1h_t_f2 pm25_t_f2

    label var tp_t_f2   "Temperature (Celsius)"
	label var hm_t_f2   "Relative humidity (%)"
	label var wsp_t_f2  "Wind speed (m/s)"
	label var pp_t_f2   "Precipitation (mm/h)"
	label var api_t_f2  "Airborne Particle Index"
	label var pm25_t_f2 "PM2.5 (ug/m3)"

	save $tmp\AustraliaEnvironment, replace
end

environmentAustralia



capture program drop environmentChina
program define environmentChina

	*Local times

	*1-h PM2.5, Beijing and Guangzhou, 2008-2016, source is US Department of State
	*A 2 pm reading refers to 13:00 to 13:59 
	use data_USDOS_PM25_Beijing_Guangzhou_2008_2016, clear
	ren site city
    replace city=lower(trim(city))
    assert city=="beijing"|city =="guangzhou"
    replace value=. if value<0						//-999 accounts for 99% of negative values
    ren value pm25us
    bysort city year month day hour: keep if _n==1	//A few repeated observations, outside the WTA series months
    keep city year month day hour pm25us
    save $tmp\tmp_USDOS_pm25_beijing_guangzhou, replace
    
	*1-h PM2.5, Hong Kong, 2008-2015 (2016 is missing in this file, but available in file below), source is Hong Kong Environmental Protection Department 
	*A 2 pm reading refers to 13:00 to 13:59
	import delimit using "data_HKEPD_PM25_HongKong_2008_2015.csv", clear rowrang(12) varnames(12)
	assert station=="CAUSEWAY BAY"
	drop station co no2-so2 
    destring fsp, replace force
    ren fsp pm25
    gen city = "hongkong"
    gen tmp = date(date,"DMY")
    drop date
    ren tmp date
    replace date = date+1 if hour==24				//Denote hour 24 by hour 0 the following day, to make consistent with other datasets
    replace hour = 0 if hour==24
    gen year  = year(date)
    gen month = month(date)
    gen day   = day(date)
	drop date
	assert year~=2016
    save $tmp\tmp_HKEPD_pm25_hongkong, replace
	use data_CMEP_PM25_6cities_2013_2016, replace
	replace city = "hongkong" if city=="Hong Kong"
    keep if city=="hongkong" & jczname=="causeway bay"		//Hong Kong Environmental Protection Department station is CAUSEWAY BAY
	drop jczname
	keep if year==2016
    append using $tmp\tmp_HKEPD_pm25_hongkong
	save $tmp\tmp_HKEPD_pm25_hongkong, replace
	
    *1-h PM2.5, several cities, 2013-2016, source is Chinese Ministry of Environmental Protection
	*A 2 pm reading refers to 13:00 to 13:59
	use data_CMEP_stationLocation_2015, clear
	replace cityEng = lower(cityEng)
    keep if cityEng == "shenzhen" | cityEng == "guangzhou" | cityEng == "beijing" | cityEng == "tianjin" | cityEng=="wuhan"
	drop Longitude Latitude StationCode
    preserve
		use data_CMEP_PM25_6cities_2013_2016, replace
		ren city cityEng
		replace cityEng = lower(cityEng)
		ren jczname monitor
    	save $tmp\tmp_CMEP_pm25_6cities, replace
	restore
	merge 1:m cityEng monitor using $tmp\tmp_CMEP_pm25_6cities, keep(3) nogen
	replace pm25=. if pm25<0
	bysort cityEng year month day hour: egen pm25_mean = mean(pm25)				//1-h mean across all stations within the city
	save $tmp\tmp_CMEP_pm25_6cities, replace
    use data_CMEP_stationLocation_2015, clear
    replace cityEng = lower(cityEng)
    keep if cityEng == "shenzhen" | cityEng == "guangzhou" | cityEng == "beijing" | cityEng == "tianjin" | cityEng=="wuhan"
    *Find air monitors near match venues
    vincenty Latitude Longitude 22.6830006 114.2018248, hav(shenzhen)  replace inkm
    vincenty Latitude Longitude 23.1394423 113.3199878, hav(guangzhou) replace inkm
    vincenty Latitude Longitude 40.019854  116.3732643, hav(beijing)   replace inkm
    vincenty Latitude Longitude 38.969986  117.0767036, hav(tianjin)   replace inkm
    vincenty Latitude Longitude 30.473851  114.45,      hav(wuhan)     replace inkm
    bysort city (shenzhen) : gen     distanceOrder = _n if cityEng=="shenzhen"
    bysort city (guangzhou): replace distanceOrder = _n if cityEng=="guangzhou"
    bysort city (beijing)  : replace distanceOrder = _n if cityEng=="beijing"
    bysort city (tianjin)  : replace distanceOrder = _n if cityEng=="tianjin"
    bysort city (wuhan)    : replace distanceOrder = _n if cityEng=="wuhan"
    keep if distanceOrder<=10
    drop shenzhen guangzhou beijing tianjin wuhan Longitude Latitude StationCode
    merge 1:m city monitor using $tmp\tmp_CMEP_pm25_6cities, keep(3) nogen
    erase $tmp\tmp_CMEP_pm25_6cities.dta
    drop city monitor
    ren cityEng city
    encode city, gen(cityID)
    egen long cityDateHour = group(cityID year month day hour)
    bysort cityDateHour distanceOrder: keep if _n==1
    reshape wide pm25, i(cityDateHour) j(distanceOrder)
    /*Use the observation for the nearest air monitor (within the same city), if it is not missing; otherwise,
	use the second nearest air monitor. If still missing, use the mean across all air monitors (within the same city)*/
	replace pm251 = pm252     if pm251==.
	replace pm251 = pm25_mean if pm251==.
	ren pm251 pm25
	drop pm25? pm2510 pm25_mean cityID cityDateHour
        
    append using $tmp\tmp_HKEPD_pm25_hongkong
	merge 1:1 city year month day hour using $tmp\tmp_USDOS_pm25_beijing_guangzhou, nogen
	erase $tmp\tmp_HKEPD_pm25_hongkong.dta
    erase $tmp\tmp_USDOS_pm25_beijing_guangzhou.dta
    
    gen date = mdy(month, day, year)
	
    save $tmp\ChinaEnvironment, replace
	
	*US Department of State PM2.5 data are missing for much of 2011 in Guangzhou and where missing we use Hong Kong Environmental Protection Department PM2.5 instead (see Table 1 caption)
	preserve
		use $tmp\ChinaEnvironment, clear
		keep if city=="hongkong"&year==2011
		replace city="guangzhou"
		keep city date hour pm25
		ren pm25 pm25us
		save $tmp\tmp_HKEPD_hongkong_2011, replace
	restore
	merge 1:1 city date hour using $tmp\tmp_HKEPD_hongkong_2011, update nogen
	erase $tmp\tmp_HKEPD_hongkong_2011.dta
	*Chinese Ministry of Environmental Protection PM2.5 data are missing in Shenzhen for matches between Dec 29, 2012 and Jan 5, 2013 and we use Hong Kong Environmental Protection Department PM2.5 instead
	preserve
		use $tmp\ChinaEnvironment, clear
		keep if city=="hongkong"&((year==2012&month==12&day>=29)|(year==2013&month==1&day<=5))
		replace city="shenzhen"
		assert date<=mdy(1,5,2013) if city=="shenzhen"
		save $tmp\tmp_CMEP_shenzhen_2013series, replace
	restore
	assert date>=mdy(1,18,2013) if city=="shenzhen"			//master data observations for Shenzhen starts only on Jan 18, 2013, so need to append
	append using $tmp\tmp_CMEP_shenzhen_2013series 
	erase $tmp\tmp_CMEP_shenzhen_2013series.dta
	
	*For Beijing and Guangzhou we use US Department of State PM2.5 data (see Table 1 caption)
	replace pm25=pm25us if city=="beijing"|city=="guangzhou"
	drop pm25us
	
    save $tmp\ChinaEnvironment, replace


	*3-h weather data, source is NASA
	*Hour is local time. A 2 am reading refers to 23:00 to 01:59, a 5 am reading refers to 02:00 to 04:59 and so on.
	use          data_NASA_Beijing_2008_2016, clear
    append using data_NASA_Guangzhou_2008_2016
	append using data_NASA_HongKong_2008_2016
    append using data_NASA_Shenzhen_2008_2016
    append using data_NASA_Tianjin_2008_2016
    append using data_NASA_Wuhan_2008_2016
	isid city date hour
    renvars qair tair press wspd precipitation \ hm tp pr wsp pp
    replace hm = 0.263*pr*hm/exp(17.67*(tp-273.16)/(tp-29.65))			//http://earthscience.stackexchange.com/questions/2360/how-do-i-convert-specific-humidity-to-relative-humidity
	replace tp=tp-273.15			//Unlike Melbourne where we use Victoria EPA temperature and wind speed, for China we use NASA data for all weather variables; Kelvin converted to Celsius
	drop pr
    	
	expand 3
	assert hour<24
    bysort city date hour: replace hour = hour[3]-(3-_n) if hour>2
    bysort city date hour: replace hour = 0  if hour==2 & _n==1
    bysort city date hour: replace hour = 1  if hour==2 & _n==2
   
   
    gen year  = year(date)
    gen month = month(date)
    gen day   = day(date)
    merge 1:1 city date hour using $tmp\ChinaEnvironment, keep(1 3) nogen		//Combine pollution and weather
	order city date hour year month day
	
	
	*Define panel data, by hour within city
	gen date_text = string(date,"%td")
    gen hd = string(hour)+" "+date_text
    gen double dt = clock(hd,"h DMY")
    encode city, gen(cityID)
    tsset cityID dt, delta(3600000)
    gen t1 = .
    gen t2 = .
    foreach x of varlist tp hm wsp pp pm25 {
        bysort cityID: replace t1 = F.`x'
        bysort cityID: replace t2 = F2.`x'
        egen `x'_t_f2   = rowmean(`x' t1 t2)
		drop `x'
        }
    drop date_text hd dt t1 t2 cityID
	
	label var tp_t_f2   "Temperature (Celsius)"
	label var hm_t_f2   "Relative humidity (%)"
	label var wsp_t_f2  "Wind speed (m/s)"
	label var pp_t_f2   "Precipitation (mm/h)"
	label var pm25_t_f2 "PM25 (ug/m3)"

	save $tmp/ChinaEnvironment, replace
end

environmentChina



capture program drop playerRanking		//This auxiliary program reads a source data file and produces a temporary dataset that is called on by programs contestAustralia and contestChina below
program define playerRanking
	*Player rank and ranking points (besides time-invariant characteristics such as date of birth)
    import delimit using "data_WTA_ranking_2004_2016.csv", clear stringcols(1)
    bysort rank_date rank_previous rank lastname firstname country ioc dob pts: keep if _n==1   //There are 400 repeated observations
    bysort rank_date lastname firstname: gen obs = _N  		//Two sisters share the same name, Ioana Ivan. Their ranks are very close (~900-1100). We pick the better rank.
    list if obs>1
	bysort rank_date lastname firstname (rank): keep if _n==1
    drop obs
    replace lastname  = trim(proper(lastname))
    replace firstname = trim(proper(firstname))
    gen fullname = firstname+" "+lastname
    gen month = month(date(rank_date, "DMY"))
	tab month												//Ranking in the months of January and September
    gen year  = year( date(rank_date, "DMY"))
	isid fullname rank_date
    bysort month year fullname (rank rank_date): keep if _n==1		//Use the better rank (and associated pts) if there is more than one observation within a calendar month; if pts differ within month-year-fullname-rank, take pts associated with the earliest observation within that tuple
	replace fullname = subinstr(fullname,"-"," ",3)
    drop rank_date rank_previous country lastname firstname
    save $tmp\tmp_ranking, replace
end

playerRanking			//Run this auxiliary program to produce a temporary dataset that is called on by programs contestAustralia and contestChina below
	
	
	
capture program drop cleanPlayerName		//This auxiliary program is called on by programs contestAustralia and contestChina
program define cleanPlayerName
   replace fullname = subinstr(fullname,"-"," ",3)
   replace fullname = "Yuliana Fedak" if fullname=="Juliana Fedak"
   replace fullname = "Carina Witthoeft" if fullname=="Karina Witthoeft"
   replace fullname = "Veronica Cepede Royg" if fullname=="Veronica Cepede"
   replace fullname = "Sofia Shapatava" if fullname=="Sophia Shapatava"
   replace fullname = "Daniella Jeflea" if fullname=="Evie Jeflea"
   replace fullname = "Valeriya Solovyeva" if fullname=="Valeria Solovyeva"
   replace fullname = "Valeriya Solovyeva" if fullname=="Valeria Solovieva"
   replace fullname = "Maria Jose Martinez Sanchez" if fullname=="Maria Martinez Sanchez"
   replace fullname = "Viktorija Rajicic" if fullname=="Viktorija Rajicic"
   replace fullname = "Kimiko Date" if fullname=="Kimiko Date Krumm"
   replace fullname = "Ting (Jr) Li" if fullname=="Ting Li" & year>2007
   replace fullname = "Ting (Sr) Li" if fullname=="Ting Li" & year<=2007
   replace fullname = "Na Li" if fullname=="Li Na"
   replace fullname = "Yifan Xu" if fullname=="Yi Fan Xu"
   replace fullname = "Hao Chen Tang" if fullname=="Chen Tang"
   replace fullname = "Aleksandra Krunic" if fullname=="Alexandra Krunic"
   replace fullname = "Anastasiya Yakimova" if fullname=="Anastasia Yakimova"
   replace fullname = "Chiara Scholl" if fullname=="Chi Chi Scholl"
   replace fullname = "Andreea Mitu" if fullname=="Cristina Andreea Mitu"
   replace fullname = "Daja Bedanova" if fullname=="Daniela Bedanova"
   replace fullname = "Edina Gallovits Hall" if fullname=="Edina Gallovits"
   replace fullname = "Evgenia Linetskaya" if fullname=="Eugenia Linetskaya"
   replace fullname = "Kathrin Woerle Scheller" if fullname=="Kathrin Woerle"
   replace fullname = "Lesia Tsurenko" if fullname=="Lesya Tsurenko"
   replace fullname = "Marie Gayanay Mikaelian" if fullname=="Marie Gaiane Mikaelian"
   replace fullname = "Marissa Gould" if fullname=="Marissa Irvin"
   replace fullname = "Michelle Larcher De Brito" if fullname=="Michelle Larcher de Brito"
   replace fullname = "Nannan Liu" if fullname=="Nan Nan Liu"
   replace fullname = "Nastassja Burnett" if fullname=="Nastassya Burnett"
   replace fullname = "Olga Vymetalkova" if fullname=="Olga Blahotova"
   replace fullname = "Raluca Olaru" if fullname=="Raluca Ioana Olaru"
   replace fullname = "Shayna McDowell" if fullname=="Shayna Mc Dowell"
   replace fullname = "Tiantian Sun" if fullname=="Tian Tian Sun"
   replace fullname = "Yuliya Beygelzimer" if fullname=="Yulia Beygelzimer"
   replace fullname = "Ekaterina Lopes" if fullname=="Ekaterina Ivanova"
   replace fullname = "Kateryna Bondarenko" if fullname=="Katerina Bondarenko"
   replace fullname = "Victoria Azarenka" if fullname=="Viktoria Azarenka"
   replace fullname = "Stephanie Foretz" if fullname=="Stephanie Foretz Gacon"
   replace fullname = "Viktoriya Kutuzova" if fullname=="Viktoria Kutuzova"
   replace fullname = "Yanchong Chen" if fullname=="Yan Chong Chen"
   replace fullname = "Vasilisa Bardina" if fullname== "Vasilissa Bardina"
   replace fullname = "Garbine Muguruza" if fullname== "Garbine Muguruza Blanco"
   replace fullname = "Irina Camelia Begu" if fullname== "Irina Begu"
   replace fullname = "Lara Arruabarrena" if fullname=="Lara Arruabarrena Vecino"
   replace fullname = "Mirjana Lucic Baroni" if fullname=="Mirjana Lucic"
   replace fullname = "Olga Puchkova" if fullname=="Olga Poutchkova"
   replace fullname = "Patricia Mayr Achleitner" if fullname=="Patricia Mayr"
   replace fullname = "Sloane Stephens" if fullname=="Sloane A. Stephens"
   replace fullname = "Sorana Cirstea" if fullname=="Sorana Mihaela Cirstea"
   replace fullname = "Stefanie Voegele" if fullname=="Stefanie Vogele"
   replace fullname = "Tzipora Obziler" if fullname=="Tzipi Obziler"
   replace fullname = "Xinyun Han" if fullname=="Xin Yun Han"
   replace fullname = "Anne Schaefer" if fullname=="Anne Jr. Schaefer"
   replace fullname = "Betina Jozami"    if fullname=="Bettina Jozami"
   replace fullname = "Daria Kasatkina"  if fullname=="Darya Kasatkina"
   replace fullname = "Madalina Gojnea"  if fullname=="Madalina Victorita Gojnea"
   replace fullname = "Margalita Chakhnashvili Ranzinger" if fullname== "Margalita Chakhnashvili"
   replace fullname = "Rossana De Los Rios"  if fullname=="Rosana De Los Rios"
   replace fullname = "Yevgenia Savranska"  if fullname=="Yevgenia Savransky"
   replace fullname = "Katerina Bohmova"  if fullname=="Katerina Klapkova"
   replace fullname = "Darya Kustova"  if fullname=="Daria Kustova"
   replace fullname = "Nadiia Kichenok" if fullname=="Nadiya Kichenok"
   replace fullname = "Eloisa Compostizo De Andres" if fullname== "Eloisa Maria Compostizo De Andres"
   replace fullname = "Eirini Georgatou" if fullname=="Iirini Georgatou"
   replace fullname = "Joanna Sakowicz Kostecka" if fullname== "Joanna Sakowicz"
   replace fullname = "Liana Ungur" if fullname=="Liana Gabriela Ungur"
   replace fullname = "Aleksandrina Naydenova" if fullname=="Alexandrina Naydenova"
   replace fullname = "Jiaqi Kang"  if fullname=="Jia Qi Kang"
   replace fullname = "Man Ying Ng" if fullname=="Man Ying Maggie Ng"
   replace fullname = "Nannan Zhang" if fullname=="Nan Nan Zhang"
   replace fullname = "Shaozhuo Liu" if fullname=="Shao Zhuo Liu"
   replace fullname = "Venise Chan" if fullname=="Venise Wing Yau Chan"
end



capture program drop contestAustralia
program define contestAustralia
	import delimit using "data_flashscore_Australia_2004_2016.csv", clear stringcols(35)
	drop v23
	foreach x of varlist v* {
		disp "`x'"
		replace `x' = trim(`x')
        assert `x'==""
		}
	drop v*
	gen double datehour_SG = clock(datehour, "DMY hm")
	format datehour_SG %tc
	gen year = year(dofc(datehour_SG))
	gen month = month(dofc(datehour_SG))
	
	gen qualify = regexm(city, "Qualification")			//Qualifying matches, rounds, venues
	bysort year: tab round if qualify==1
	replace round = subinstr(round,"hard - ","",1)
	replace round = subinstr(round,"hard","",1)
	replace round = trim(round)
	replace round = "Q-1R" if qualify==1 & (round=="Quarter-finals" | round=="1/64-finals")
	replace round = "Q-2R" if qualify==1 & (round=="Semi-finals"    | round=="1/32-finals")
	replace round = "Q-3R" if qualify==1 & (round=="Final"          | round=="1/16-finals")
	*No information on qualifying rounds in 2009, thus assigned by competition time
	bysort year qualify (datehour_SG): replace round = "Q-1R" if qualify==1 & year==2009 & _n<=48
	bysort year qualify (datehour_SG): replace round = "Q-2R" if qualify==1 & year==2009 & _n>48 & _n<=72
	bysort year qualify (datehour_SG): replace round = "Q-3R" if qualify==1 & year==2009 & _n>72
	replace round = "Q-3R" if round=="" & matchid=="WIEYSjuG"	//Missing round for one match; entered manually
	replace city = "melbourne"
	
	split title, p("|")
	replace title2 = subinstr(title2," - ",",",1)
	split title2, p(",")
	renvars title21 title22 / fullname1 fullname2
	drop title*
	replace fullname1 = trim(proper(fullname1))
	replace fullname2 = trim(proper(fullname2))
	
	ren fullname1 fullname
	cleanPlayerName
	merge m:1 fullname year month using $tmp/tmp_ranking, keep(1 3) nogen	//Merge in time-varying WTA rank and ranking points based on the player 1's full name
	foreach x in fullname rank pts dob ioc {
		ren `x' `x'1
		}
	ren fullname2 fullname
	cleanPlayerName
	merge m:1 fullname year month using $tmp/tmp_ranking, keep(1 3) nogen	//Merge in time-varying WTA rank and ranking points based on the player 2's full name
	foreach x in fullname rank pts dob ioc {
		ren `x' `x'2
		}

    replace dob1 = "13 Jan 1988" if dob1==""&fullname1=="Georgie Stoop"
	replace dob1 = "01 Jun 1982" if dob1==""&fullname1=="Justine Henin"
	replace dob1 = "12 Aug 1982" if dob1==""&fullname1=="Maria Jose Martinez Sanchez"
	replace dob1 = "10 May 1998" if dob1==""&fullname1=="Priscilla Hon"
	replace dob1 = "24 Oct 1972" if dob1==""&fullname1=="Ruxandra Dragomir Ilie"
	replace dob1 = "07 Apr 1994" if dob1==""&fullname1=="Viktorija Rajicic"
	replace dob2 = "13 Jan 1988" if dob2==""&fullname2=="Georgie Stoop"
	replace dob2 = "01 Jun 1982" if dob2==""&fullname2=="Justine Henin"
	replace dob2 = "6 Jun 1991" if dob2==""&fullname2=="Bianca Botto Arias"
	replace dob2 = "22 Mar 1990" if dob2==""&fullname2=="Leticia Costas Moreira"
	replace dob2 = "05 Feb 1998" if dob2==""&fullname2=="Sara Tomic"
	replace dob2 = "14 Jan 1978" if dob2==""&fullname2=="Silvija Talaja"
	replace dob2 = "13 Aug 1976" if dob2==""&fullname2=="Tatiana Panova"
	replace dob2 = "07 Sep 1984" if dob2==""&fullname2=="Vera Zvonareva"
	bysort fullname1: assert dob1==dob1[_n-1] if _n>1
	bysort fullname2: assert dob2==dob2[_n-1] if _n>1

	merge m:1 month year using data_WTA_ranking_mean_SD_2004_2016, keep(1 3) nogen
    gen pts_normalized1 = (pts1-meanPts)/sdPts		//To account for inflation in WTA ranking points circa 2008/9 (see Figure 2 caption)
    gen pts_normalized2 = (pts2-meanPts)/sdPts
	drop meanPts sdPts
	gen dobn1 = date(dob1,"DMY")
    gen dobn2 = date(dob2,"DMY")
	format dobn1 dobn2 %d
    drop dob?
    renvars dobn1 dobn2 / dob1 dob2
    
    split datehour
    split datehour2, p(":")
    destring datehour21 datehour22, gen(hour minutes)
    replace hour = hour+3				//Singapore (where data was downloaded) to Melbourne time
 	gen day  = real(substr(datehour,1,2))
 	gen date = mdy(month, day, year)
    format date %d
    drop datehour*
    
    destring bet1 bet2, replace force
    ren bet1 bet1_temp
	ren bet2 bet2_temp
	merge 1:1 matchid using data_bettingOdds_Australia_2008, assert(1 3) nogen		//Betting odds for 2008 were missing in original flashscore.com data so obtain from tennis-data.co.uk
	assert (bet1~=.&bet2~=.) if year==2008
	assert (bet1==.&bet2==.) if year~=2008
	replace bet1 = bet1_temp if year~=2008
	replace bet2 = bet2_temp if year~=2008
	drop bet1_temp bet2_temp
	tab year if bet1==.|bet2==.
	tab year if bet1~=. & qualify==0
	tab year if bet2~=. & qualify==0
	
	gen set1game1 = real(substr(set1won1,1,1))
    gen len = length(set1won1)-1
    gen set1tiebreak1 = real(substr(set1won1,2,len))
    gen set2game1 = real(substr(set2won1,1,1))
    replace len = length(set2won1)-1
    gen set2tiebreak1 = real(substr(set2won1,2,len))
    gen set1game2 = real(substr(set1won2,1,1))
	replace len = length(set1won2)-1
    gen set1tiebreak2 = real(substr(set1won2,2,len))
    gen set2game2 = real(substr(set2won2,1,1))
    replace len = length(set2won2)-1
    gen set2tiebreak2 = real(substr(set2won2,2,len))
    renvars set3won1 set3won2 / set3game1 set3game2
    destring set3game1 set3game2, replace
    drop set?won? len
	
	drop name? matchduration set?duration link?
	
	order matchid city round qualify date year month day hour minutes matchstatus score1 score2 ///
		set1game1 set1tiebreak1 set1game2 set1tiebreak2 set2game1 set2tiebreak1 set2game2 set2tiebreak2 set3game1 set3game2 ///
		fullname1 fullname2 ioc1 ioc2 dob1 dob2 rank1 rank2 pts1 pts2 pts_normalized1 pts_normalized2 bet1 bet2
	sort city date year month day hour minutes
	
	save $tmp\AustraliaContest, replace
end

contestAustralia



capture program drop contestChina
program define contestChina
    import delimit using "data_flashscore_China_2008_2016.csv", clear stringcols(35)
    drop v23
    foreach x of varlist v* {
        disp "`x'"
        assert `x'==.
		}
    drop v*
    gen double datehour_SG = clock(datehour, "DMY hm")
    format datehour_SG %tc
    gen year = year(dofc(datehour_SG))
	
    gen qualify = regexm(city, "Qualification")			//Qualifying matches, rounds, venues
    bysort year: tab round if qualify==1
    replace round = subinstr(round,"hard - ","",1)
    replace round = subinstr(round,"hard","",1)
    replace round = trim(round)
    replace round = "Q-1R" if qualify==1 & (round=="Quarter-finals" | round=="1/64-finals")
    replace round = "Q-2R" if qualify==1 & (round=="Semi-finals"    | round=="1/32-finals")
    replace round = "Q-3R" if qualify==1 & (round=="Final"          | round=="1/16-finals")
    replace city = lower(word(city,1))
	replace city = "hongkong" if city=="hong"
	
	*matchid 4SAXYL9k shows as Finished but only 2 games were played in set 2
    replace matchstatus = "Finished / retired" if matchid=="4SAXYL9k"
   
    *Because we are missing PM2.5 data for Guangzhou in 2009 and 2010--not available either from US DOS or HK EPD--we drop from the sample
	drop if city=="guangzhou"&year<2011
   
    split title, p("|")
    replace title2 = subinstr(title2," - ",",",1)
    split title2, p(",")
    renvars title21 title22 / fullname1 fullname2
    drop title*
    replace fullname1 = trim(proper(fullname1))
    replace fullname2 = trim(proper(fullname2))
   
    gen month = 9		//Ranking month: All WTA series but Shenzhen were played in the summer/fall
    replace month = 1 if city=="shenzhen"
   
    ren fullname1 fullname
    cleanPlayerName
    merge m:1 fullname year month using $tmp\tmp_ranking, keep(1 3) nogen	//Merge in time-varying WTA rank and ranking points based on the player 1's full name
    foreach x in fullname rank pts dob ioc {
	    ren `x' `x'1
        }
    ren fullname2 fullname
    cleanPlayerName
    merge m:1 fullname year month using $tmp\tmp_ranking, keep(1 3) nogen	//Merge in time-varying WTA rank and ranking points based on the player 1's full name
    foreach x in fullname rank pts dob ioc {
        ren `x' `x'2
        }
    
	replace dob1 = "14 Jan 1978" if dob1==""&fullname1=="Silvija Talaja"
	replace dob1 = "10 Apr 1987" if dob1==""&fullname1=="Yanchong Chen"
	replace dob1 = "2 Apr 1993" if dob1==""&fullname1=="Yang Pang"
	replace dob1 = "24 Sep 1984" if dob1==""&fullname1=="Klaudia Jans Ignacik"
	replace dob1 = "13 Mar 1986" if dob1==""&fullname1=="Andreja Klepac"
	replace dob1 = "15 Jul 1997" if dob1==""&fullname1=="Jil Belen Teichmann"
	replace dob1 = "9 Jan 1997" if dob1==""&fullname1=="Yijia Shao"
	replace dob1 = "22 Apr 1996" if dob1==""&fullname1=="Eudice Wong Chong"
	replace dob1 = "17 Dec 1984" if dob1==""&fullname1=="Naoko Eto"
	replace dob1 = "19 Jul 1997" if dob1==""&fullname1=="Jiaqi Kang"
	replace dob1 = "10 Jan 1985" if dob1==""&fullname1=="Chia Jung Chuang"
	replace dob1 = "1 Jan 1995" if dob1==""&fullname1=="Yuanyi Yu"
	replace dob2 = "2 Apr 1993" if dob2==""&fullname2=="Yang Pang"
	replace dob2 = "13 Mar 1986" if dob2==""&fullname2=="Andreja Klepac"
	replace dob2 = "22 Apr 1996" if dob2==""&fullname2=="Eudice Wong Chong"
	replace dob2 = "19 Feb 1997" if dob2==""&fullname2=="Kwan Yau Ng"
	replace dob2 = "10 Jan 1998" if dob2==""&fullname2=="Shilin Xu"
	bysort fullname1: assert dob1==dob1[_n-1] if _n>1
	bysort fullname2: assert dob2==dob2[_n-1] if _n>1

	merge m:1 month year using data_WTA_ranking_mean_SD_2004_2016, keep(1 3) nogen
    gen pts_normalized1 = (pts1-meanPts)/sdPts		//To account for inflation in WTA ranking points circa 2008/9 (see Figure 2 caption)
    gen pts_normalized2 = (pts2-meanPts)/sdPts
	drop meanPts sdPts month
	gen dobn1 = date(dob1,"DMY")
    gen dobn2 = date(dob2,"DMY")
	format dobn1 dobn2 %d
    drop dob?
    renvars dobn1 dobn2 / dob1 dob2
    
    split datehour
    split datehour2, p(":")
    destring datehour21 datehour22, gen(hour minutes)
    replace hour = hour				//Singapore time = Beijing time
 	gen day = real(substr(datehour,1,2))
 	gen month = real(substr(datehour,4,2))
	gen date = mdy(month, day, year)
    format date %d
    drop datehour*
    
    destring bet1 bet2, replace force
	ren bet1 bet1_temp
	ren bet2 bet2_temp
	merge 1:1 matchid using data_bettingOdds_China_2008, assert(1 3) nogen		//Betting odds for 2008 were missing in original flashscore.com data so obtain from tennis-data.co.uk
	assert (bet1~=.&bet2~=.) if year==2008&matchid~="booNT52q"
	assert (bet1==.&bet2==.) if year~=2008
	replace bet1 = bet1_temp if year~=2008
	replace bet2 = bet2_temp if year~=2008
	drop bet1_temp bet2_temp
	tab year if bet1==.|bet2==.
	tab year if bet1~=. & qualify==0
	tab year if bet2~=. & qualify==0
	
	tostring set1won2 set2won2 set3won2 set1won1 set2won1 set3won1, replace
    gen set1game1 = real(substr(set1won1,1,1))
    gen len = length(set1won1)-1
    gen set1tiebreak1 = real(substr(set1won1,2,len))
    gen set2game1 = real(substr(set2won1,1,1))
    replace len = length(set2won1)-1
    gen set2tiebreak1 = real(substr(set2won1,2,len))
    gen set1game2 = real(substr(set1won2,1,1))
    replace len = length(set1won2)-1
    gen set1tiebreak2 = real(substr(set1won2,2,len))
    gen set2game2 = real(substr(set2won2,1,1))
    replace len = length(set2won2)-1
    gen set2tiebreak2 = real(substr(set2won2,2,len))
    renvars set3won1 set3won2 / set3game1 set3game2
    destring set3game1 set3game2, replace
    *Set 3 can go to tie-breaker when not a Grand Slam
    drop set?won? len

	drop name? matchduration set?duration link?
	
	order matchid city round qualify date year month day hour minutes matchstatus score1 score2 ///
		set1game1 set1tiebreak1 set1game2 set1tiebreak2 set2game1 set2tiebreak1 set2game2 set2tiebreak2 set3game1 set3game2 ///
		fullname1 fullname2 ioc1 ioc2 dob1 dob2 rank1 rank2 pts1 pts2 pts_normalized1 pts_normalized2 bet1 bet2
	sort city date year month day hour minutes

	save $tmp\ChinaContest, replace
end

contestChina
erase $tmp\tmp_ranking.dta	


	
capture program drop combineContestEnvironment
program define combineContestEnvironment
	*Match prize money in nominal US$
	import excel using "data_WTA_cashPrize_BLS_CPI_2004_2016.xlsx", sheet("CashUSD_wta") cellrange(A2:W43) firstrow clear
	keep city yearSeries p*
	reshape long p, i(city yearSeries) j(round)
	ren p prizeCash
	drop if prizeCash==.
	save $dta\tmp_prizeSpreads, replace
	*US CPI
	import excel using "data_WTA_cashPrize_BLS_CPI_2004_2016.xlsx", sheet("cpi_us_bls") cellrange(A2:M15) firstrow clear
	reshape long cpi, i(year) j(month)
	sort year month
	ren year yearSeries
	gen cpi201612=cpi[_N]
	replace cpi=cpi/cpi201612
	drop cpi201612
	save $tmp\tmp_cpi, replace
	
	
	use $tmp\AustraliaContest, clear
	merge m:1 date hour using $tmp\AustraliaEnvironment, keep(1 3) nogen	
	save $tmp\AustraliaAnalysis, replace
	
	use $tmp\ChinaContest, clear
	merge m:1 city date hour using $tmp\ChinaEnvironment, keep(1 3) nogen	
	
	append using $tmp\AustraliaAnalysis
	erase $tmp\AustraliaAnalysis.dta
	
	
	gen cityID=1							//By order of prestige (prize money): Melbourne, Beijing, Wuhan, Rest of China: By alphabetic order
	replace cityID=2 if city=="beijing"
	replace cityID=3 if city=="wuhan"
	replace cityID=4 if city=="guangzhou"
	replace cityID=5 if city=="hongkong"
	replace cityID=6 if city=="shenzhen"
	replace cityID=7 if city=="tianjin"

	gen     rid = .							//Round identifier (coded manually to make numeric variable easier to interpret)
	replace rid = 1   if round=="Final"
	replace rid = 2   if round=="Semi-finals"
	replace rid = 4   if round=="Quarter-finals"
	replace rid = 8   if round=="1/8-finals"
	replace rid = 16  if round=="1/16-finals"
	replace rid = 32  if round=="1/32-finals"
	replace rid = 64  if round=="1/64-finals"
	replace rid = 100 if round=="Q-3R"
	replace rid = 101 if round=="Q-2R"
	replace rid = 102 if round=="Q-1R"		//Q-1R precedes Q-2R which precedes Q-3R
	label define roundno 1 "Final = 1" 2 "Semi-finals = 2" 4 "Quarter-finals = 4" 8 "1/8-finals = 8" 16 "1/16-finals = 16" 32 "1/32-finals = 32" 64 "1/64-finals = 64" 100 "Q-3R = 100" 101 "Q-2R = 101" 102 "Q-1R = 102"
	label values rid roundno
	drop round
	ren rid round
	order round, after(matchid)
	
	*Shenzhen's qualifying and round-16 matches played on December 29-31 2012 refer to the 2013 WTA tournament series
	gen yearSeries = year
	replace yearSeries = 2013 if city=="shenzhen"&year==2012&month==12
		
	
	merge m:1 city yearSeries round using $dta\tmp_prizeSpreads, assert(2 3) keep(3) nogen	//We observe qualifying matches from 2009 on in Melbourne and from 2010 on in Beijing. We miss qualifying and round 16 matches in 2014 in Shenzhen. 
    erase $dta\tmp_prizeSpreads.dta
	merge m:1 yearSeries month using $tmp\tmp_cpi, assert(2 3) keep(3) nogen
	erase $tmp\tmp_cpi.dta
	replace prizeCash=prizeCash/cpi			//Cash prize is now in constant Dec-2016 US dollars	
    drop cpi
		
	
	*Particle pollution
	summ api_t_f2 year if city=="melbourne"						//Australia: 1-h API is available throughout; 1-h PM2.5 is available only from 2015  
	summ pm25_t_f2 year if city=="melbourne" & pm25_t_f2~=.		//Because maximum 1-h API is 5.1 in the Australia sample, 1-h PM2.5 never reaches 100 ug/m3 (see the end of Section 2 of the article)
	gen pm25per100_t_f2 = pm25_t_f2/100
	summ pm25per100_t_f2, detail
	local max=r(max)
	recode pm25per100_t_f2 (0/1=1) (1/1.5=2) (1.5/2=3) (2/`max'=4), gen(pm25_bin)
	replace pm25_bin=1 if city=="melbourne"&pm25_bin==.			//Again, because maximum 1-h API is 5.1 in the Australia sample, 1-h PM2.5 never reaches 100 ug/m3 (see the end of Section 2 of the article)
	xi i.pm25_bin, pre(P)
	*Group some pollution bins
	gen Ppm25_bin_234 = Ppm25_bin_2+Ppm25_bin_3+Ppm25_bin_4		//>100 ug/m3
	gen Ppm25_bin_23  = Ppm25_bin_2+Ppm25_bin_3					//(100,200] ug/m3
	gen Ppm25_bin_34  =             Ppm25_bin_3+Ppm25_bin_4		//>150 ug/m3
	
	*Temperature: 
	summ tp_t_f2, detail
	local max=r(max)
	recode tp_t_f2 (0/27=1) (27/29=2) (29/34=3) (34/`max'=4), gen(tp_bin)
	xi i.tp_bin, pre(T)
	*Group some temperature bins
	gen Ttp_bin_234 = Ttp_bin_2+Ttp_bin_3+Ttp_bin_4				//>27 C
	gen Ttp_bin_34  =           Ttp_bin_3+Ttp_bin_4				//>29 C
	
	*Indicator for any rain
	gen dv_rain=(pp_t_f2>0) if pp_t_f2~=.
 	
	
	egen totalSets = rowtotal(score?) if matchstatus=="Finished"	//Total sets for completed matches, missing for matches with withdrawals. Variables score1 and score2 denote the number of sets won by players 1 and 2, respectively.
	
	*matchComplete=1 <=> totalSets~=. & matchComplete=0 <=> totalSets==.
	gen  matchComplete = (max(set2game1,set2game2)>=6 & totalSets==2) | (max(set3game1,set3game2)>=6 & totalSets==3)
	assert matchComplete==1 if totalSets~=.
	assert totalSets~=. if matchComplete==1
	assert matchComplete==0 if totalSets==.
	assert totalSets==. if matchComplete==0
	
	gen threeSetter = totalSets==3 if matchComplete==1
	egen set1Games = rowtotal(set1game?) if matchComplete==1
    egen set2Games = rowtotal(set2game?) if matchComplete==1
	egen set3Games = rowtotal(set3game?) if matchComplete==1&threeSetter==1		//A Grand Slam does not go to tie-breaker in a third set
	replace set3Games=13 if city~="melbourne"&(set3game1>7|set3game2>7)&matchComplete==1&threeSetter==1 	//Unlike Grand Slam matches (in Melbourne), third sets in matches played in China may go to tie-breaker
	egen totalGames = rowtotal(set?Games) if matchComplete==1
	gen incrGames_p1 = set2game1 - set1game1 if matchComplete==1		//How many more games player 1 wins in set 2 over set 1 (missing if match was not completed)
	gen incrGames_p2 = set2game2 - set1game2 if matchComplete==1		//How many more games player 2 wins in set 2 over set 1 (missing if match was not completed)
    gen incrGames_set1loser = incrGames_p1*(set1game1<set1game2) + incrGames_p2*(set1game1>set1game2) if matchComplete==1		//Increase in games won in set 2 versus set 1 for set 1 loser (missing if match was not completed)
	drop incrGames_p1 incrGames_p2
	gen winMatch_set1loser = (set1game1<set1game2 & score1>score2) | (set1game1>set1game2 & score1<score2) if matchComplete==1		//Indicator for set 1 loser winning the match (missing if match was not completed)
		
		
    *[1] Relative strength: winning probabilities (in percent) derived from paired players' pre-match betting odds, bet1 and bet2; generate difference in winning probabilities
	gen winProb_p1 = 100/bet1 if bet1~=.&bet2~=.
    gen winProb_p2 = 100/bet2 if bet1~=.&bet2~=.
	gen winProb_diff = abs(winProb_p1 - winProb_p2)
	foreach x in 10 15 20 25 30 {
		gen winProb_diffLess`x'=(winProb_diff<=`x') if winProb_diff~=.
		}
	foreach x in 65 {
		gen winProb_diffMore`x'=(winProb_diff>=`x') if winProb_diff~=.
		}
	gen winProb_pMax = max(winProb_p1,winProb_p2) if bet1~=.&bet2~=.
	*Indicator for the match's underdog, according to betting markets, winning the match (among completed matches for which there was an underdog)
	gen underdogBet_wonM  = ((score1>score2       & bet1>bet2) | (score1<score2       & bet1<bet2)) if matchComplete==1&bet1~=.&bet2~=.&bet1~=bet2
	*Indicator for the match's underdog, according to betting markets, winning set 1 (among completed matches for which there was an underdog)
	gen underdogBet_wonS1 = ((set1game1>set1game2 & bet1>bet2) | (set1game1<set1game2 & bet1<bet2)) if matchComplete==1&bet1~=.&bet2~=.&bet1~=bet2
    *Indicator for the match's favorite, according to betting markets, winning set 1 (among completed matches for which there was an underdog)
	gen favoriteBet_wonS1 = ((set1game1>set1game2 & bet1<bet2) | (set1game1<set1game2 & bet1>bet2)) if matchComplete==1&bet1~=.&bet2~=.&bet1~=bet2
		
	*Ex-ante winning probability of the match winner (missing if match was not completed)
	gen winProb_wonM = winProb_p1*(score1>score2) + winProb_p2*(score1<score2) if matchComplete==1

	
	*[2] Relative strength: from paired players' WTA ranks, rank1 and rank2; generate difference in rank (linear metric, log of measure)
	gen rank_diff=abs(rank1-rank2)
	gen rank_lndiff=log(rank_diff)
	*Indicator for the match's underdog, according to rank, winning the match (among completed matches for which there was an underdog)
	gen underdogRank_wonM  = ((score1>score2 & rank1>rank2) | (score1<score2 & rank1<rank2)) if matchComplete==1&rank1~=.&rank2~=.&rank1~=rank2
	
	
	*[3] Relative strength: from paired players' WTA ranking points and z-scores; generate difference in pts
	gen pts_diff=abs(pts1-pts2)
	gen pts_normal_diff=abs(pts_normalized1-pts_normalized2)
	*Indicator for the match's underdog, according to pts, winning the match (among completed matches for which there was an underdog)
	gen underdogPts_wonM  = ((score1>score2 & pts1<pts2) | (score1<score2 & pts1>pts2)) if matchComplete==1&pts1~=.&pts2~=.&pts1~=pts2
	
	
	*Rank and ranking points have weaker predictive power than betting markets (see footnote 14 in the article)
	reg underdogBet_wonM winProb_diff
	reg underdogRank_wonM rank_diff
	reg underdogRank_wonM rank_lndiff
	reg underdogPts_wonM pts_normal_diff
	
	
	label var matchComplete  		"Match is completed (yes=1)"
	label var totalSets 			"Total sets for completed matches"
    label var totalGames 			"Total games for completed matches"
    label var set1Games    			"Total set 1 games for completed matches"
    label var set2Games    			"Total set 2 games for completed matches"
    label var set3Games    			"Total set 3 games for completed matches"
    label var threeSetter   		"Match is completed in 3 sets (yes=1)"
    label var incrGames_set1loser   "Increase in games won in set 2 versus set 1 for set 1 loser (completed matches)"
    label var winMatch_set1loser   	"Set 1 loser wins the match (yes=1)"
	
	label var winProb_diff	        "Diff. in winning probability between the two players (%, absolute)"
	foreach x in 10 15 20 25 30 {
	  label var winProb_diffLess`x' "Diff. in players' winning probabilities <= `x'% (yes=1)"
	  }
    foreach x in 65 {
	  label var winProb_diffMore`x' "Diff. in players' winning probabilities >= `x'% (yes=1)"
	  }
    label var underdogBet_wonM      "Underdog, according to betting markets, wins match (yes=1)"
	label var underdogBet_wonS1     "Underdog, according to betting markets, wins set 1 (yes=1)"
	label var favoriteBet_wonS1		"Favorite, according to betting markets, wins set 1 (yes=1)"
    label var winProb_wonM          "Winning probability for the match winner (completed matches)"
    label var rank_diff				"Diff. in WTA rank between the two players"
    label var pts_diff				"Diff. in WTA ranking points between the two players"
	label var pts_normal_diff		"Diff. in WTA z-score between the two players"
    
	
	*Hour bins
	gen hour_bin=floor(hour/3)
	
	*Player age and paired players' age difference
	gen age_diff = abs(dob1-dob2)/365.25
	gen age_p1 = (date - dob1)/365.25
    gen age_p2 = (date - dob2)/365.25
		
	save $tmp\PooledAnalysis, replace
end
	
combineContestEnvironment	
	
	
	
capture program drop descriptives
program define descriptives
	use $tmp\PooledAnalysis, clear
	
	
	*Tables 1 and A.1 (and some statistics reported in Section 2 of the article)
	 global qualifier "qualify==0"		//Table 1; see program reducedForm (specifically, code replicating Table A.3) for total points played in the match
	*global qualifier "qualify~=."		//Table A.1: Extended sample with qualifying matches, for which data are available)
	preserve
        keep if city=="melbourne"&$qualifier
		keep  winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour api_t_f2 pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		order winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour api_t_f2 pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		outreg2 using $tmp\melbourneStats.tex, eqkeep(N mean sd min max) sum(detail) bdec(2) replace label excel tex
    restore
	preserve
        keep if city=="beijing"&$qualifier
		keep  winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		order winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		outreg2 using $tmp\beijingStats.tex, eqkeep(N mean sd min max) sum(detail) bdec(2) replace label excel tex
    restore
	preserve
        keep if city~="melbourne"&city~="beijing"&$qualifier
		keep  winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		order winProb_diff rank_diff pts_diff age_diff matchComplete prizeCash set1Games set2Games incrGames_set1loser threeSetter winMatch_set1loser set3Games hour pm25_t_f2 tp_t_f2 hm_t_f2 wsp_t_f2 dv_rain
		outreg2 using $tmp\otherChinaStats.tex, eqkeep(N mean sd min max) sum(detail) bdec(2) replace label excel tex
    restore
	summ set1Games set2Games set3Games if qualify==0
	summ set1Games set2Games set3Games if qualify==0 & set3Games~=.
		
		
	*Figure 1
	preserve
        capture drop parameter density_param
		gen parameter = .5*_n in 10/90
		kdensity tp_t_f2 if city=="melbourne"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "Temperature during match ({superscript:O}C)"
		label var density_param "Density over matches"
		line density_param parameter, xlab(5(5)45) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\melbourneTemperature.eps, replace
    restore
	preserve
        capture drop parameter density_param
		gen parameter = .5*_n in 10/90
		kdensity tp_t_f2 if city=="beijing"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "Temperature during match ({superscript:O}C)"
		label var density_param "Density over matches"
		line density_param parameter, xlab(5(5)45) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\beijingTemperature.eps, replace
	restore
	preserve
        capture drop parameter density_param
		gen parameter = .5*_n in 10/90
		kdensity tp_t_f2 if city~="melbourne"&city~="beijing"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "Temperature during match ({superscript:O}C)"
		label var density_param "Density over matches"
		line density_param parameter, xlab(5(5)45) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\otherChinaTemperature.eps, replace
	restore
	preserve
        capture drop parameter density_param
		gen parameter = 2*(_n-1) in 1/261
		kdensity pm25_t_f2 if city=="melbourne"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "PM2.5 concentration during match ({&mu}g/m{superscript:3})"
		label var density_param "Density over matches"
		line density_param parameter if parameter<350, xlab(12 35 50(50)350) lwidth(medthick) xline(12) xline(35) scheme(s1manual)
		*graph export $tmp\melbournePM25.eps, replace
    restore
	preserve
        capture drop parameter density_param
		gen parameter = 2*(_n-1) in 1/261
		kdensity pm25_t_f2 if city=="beijing"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "PM2.5 concentration during match ({&mu}g/m{superscript:3})"
		label var density_param "Density over matches"
		line density_param parameter if parameter<350, xlab(12 35 50(50)350) lwidth(medthick) xline(12) xline(35) scheme(s1manual)
		*graph export $tmp\beijingPM25.eps, replace
	restore
	preserve
        capture drop parameter density_param
		gen parameter = 2*(_n-1) in 1/261
		kdensity pm25_t_f2 if city~="melbourne"&city~="beijing"&qualify==0, k(gau) at(parameter) gen(density_param)
		label var parameter "PM2.5 concentration during match ({&mu}g/m{superscript:3})"
		label var density_param "Density over matches"
		line density_param parameter if parameter<350, xlab(12 35 50(50)350) lwidth(medthick) xline(12) xline(35) scheme(s1manual)
		*graph export $tmp\otherChinaPM25.eps, replace
	restore
	
	
	*Figure 2 (and material supporting statements in Sections 2, 3, and 5 of the article, as well as our response to the editor and to reviewers)
	preserve
		keep if qualify==0
		sum age_diff if qualify==0, det		//Footnote 17 of the article
		keep matchid round city date yearSeries rank1 rank2 pts_normalized1 pts_normalized2 age_p1 age_p2
		twoway (scatter rank1 age_p1) (scatter rank2 age_p2)	//Section 5.3 under age heterogeneity (professionals who are active in this labor market over longer careers tend to be better ranked)
		reshape long rank pts_normalized age_p, i(matchid round city date yearSeries) j(player)
		sum age_p, detail					//Footnote 17 of the article

		
		*WTA rank
		sum rank if city=="melbourne"                , det		//Statistics stated in Section 2 of the article
		sum rank if city=="beijing"                  , det
		sum rank if city~="melbourne"&city~="beijing", det		
        
		capture drop parameter density_param
		gen parameter = 2*_n-1 in 1/608
		label var parameter "WTA rank of player in match"
		
		kdensity rank if city=="melbourne", k(gau) at(parameter) gen(density_param)
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(1 5 10 20 50 100 200 500 1000) xscale(log) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\melbourneRank.eps, replace

		sum rank if city=="melbourne" & round<=32, det
		kdensity rank if city=="melbourne" & round<=32, k(gau) at(parameter) gen(density_param_noR64)
		kdensity rank if city=="beijing"              , k(gau) at(parameter) gen(density_param_bj)
		label var density_param   	  "Melbourne"
		label var density_param_noR64 "Melbourne (less round 64)"
		label var density_param_bj    "Beijing"
		twoway (line density_param parameter if parameter<=201, lwidth(thick)) (line density_param_noR64 parameter if parameter<=201, lwidth(medthick)) ///
		       (line density_param_bj parameter if parameter<=201), xlabel(1(20)201) scheme(s1manual)
		
		kdensity rank if city=="melbourne" & (yearS==2006|yearS==2009|yearS==2014), k(gau) at(parameter) gen(density_param_hot)		//Statistics stated at the end of Section 3 of the article
		kdensity rank if city=="melbourne" & (yearS==2004|yearS==2011|yearS==2015), k(gau) at(parameter) gen(density_param_cool)
		label var density_param_hot  "Melbourne, hotter events"
		label var density_param_cool "Melbourne, less-hot events"
		twoway (line density_param_hot parameter if parameter<=201, lwidth(thick)) (line density_param_cool parameter if parameter<=201, lwidth(medthick)), xlabel(1(20)201) scheme(s1manual)
		
		capture drop parameter density_param
		gen parameter = 2*_n-1 in 1/608
		kdensity rank if city=="beijing", k(gau) at(parameter) gen(density_param)
		label var parameter "WTA rank of player in match"
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(1 5 10 20 50 100 200 500 1000) xscale(log) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\beijingRank.eps, replace
		
		kdensity rank if city=="beijing" & (yearS==2010|yearS==2011|yearS==2013), k(gau) at(parameter) gen(density_param_dirty)
		kdensity rank if city=="beijing" & (yearS==2008|yearS==2012|yearS==2016), k(gau) at(parameter) gen(density_param_clean)
		label var density_param_dirty "Beijing, more polluted"
		label var density_param_clean "Beijing, less polluted"
		twoway (line density_param_dirty parameter if parameter<=201, lwidth(thick)) (line density_param_clean parameter if parameter<=201, lwidth(medthick)), xlabel(1(20)201) scheme(s1manual)
		
		capture drop parameter density_param
		gen parameter = 2*_n-1 in 1/608
		kdensity rank if city~="melbourne"&city~="beijing", k(gau) at(parameter) gen(density_param)
		label var parameter "WTA rank of player in match"
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(1 5 10 20 50 100 200 500 1000) xscale(log) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\otherChinaRank.eps, replace
		
		*WTA z-score
        capture drop parameter density_param
		gen parameter = .25*(_n-3) in 1/79
		kdensity pts_normalized if city=="melbourne", k(gau) at(parameter) gen(density_param)
		label var parameter "WTA ranking points of match player (time-specific world z-score)"
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(-1(1)19) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\melbourneNormalizedPts.eps, replace

		capture drop parameter density_param
		gen parameter = .25*(_n-3) in 1/79
		kdensity pts_normalized if city=="beijing", k(gau) at(parameter) gen(density_param)
		label var parameter "WTA ranking points of match player (time-specific world z-score)"
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(-1(1)19) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\beijingNormalizedPts.eps, replace
	
		capture drop parameter density_param
		gen parameter = .25*(_n-3) in 1/79
		kdensity pts_normalized if city~="melbourne"&city~="beijing", k(gau) at(parameter) gen(density_param)
		label var parameter "WTA ranking points of match player (time-specific world z-score)"
		label var density_param "Density over player-match observations"
		line density_param parameter, xlab(-1(1)19) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\otherChinaNormalizedPts.eps, replace
	restore

	
	*Figure 3
	preserve
		keep if qualify==0
		
		*Winning probability difference
        capture drop parameter density_param
		gen parameter = _n-1 in 1/97
		kdensity winProb_diff if $qualifier, k(gau) at(parameter) gen(density_param)
		label var parameter "Difference in opponents' pre-match winning probability (%)"
		label var density_param "Density over matches"
		line density_param parameter, xlab(0(10)100) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\DifferenceWinProb.eps, replace
				
		*WTA rank difference
        capture drop parameter density_param
		gen parameter = 2*(_n-1) in 1/601
		kdensity rank_diff if $qualifier, k(gau) at(parameter) gen(density_param)
		label var parameter "Difference in opponents' WTA rank"
		label var density_param "Density over matches"
		line density_param parameter if parameter>0, xlab(5 10 20 50 100 200 500 1000) xscale(log) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\DifferenceRank.eps, replace
		
		*WTA z-score difference
        capture drop parameter density_param
		gen parameter = .25*(_n-1) in 1/75
		kdensity pts_normal_diff if $qualifier, k(gau) at(parameter) gen(density_param)
		label var parameter "Difference in opponents' WTA ranking points (in pop. std. dev.)"
		label var density_param "Density over matches"
		line density_param parameter, xlab(0(1)19) lwidth(medthick) scheme(s1manual)
		*graph export $tmp\DifferenceNormalizedPts.eps, replace
	restore
	
	
	*Figure 4 (and statistics stated in Section 2 of the article)
	correl winProb_diff rank_lndiff if qualify==0
	correl winProb_diff pts_normal_diff if qualify==0
	
	*Against winning probability difference
	preserve
		keep if qualify==0
		gen heterog=winProb_diff			
		*Note underdog was defined only for heterog>0 and for completed matches
			gen underdog_wonM=underdogBet_wonM
			local num_bin    =10
		summ heterog if heterog~=0, det
		local lb=r(min)
		local ub=r(p95)
		local width=(`ub'-`lb')/`num_bin'
		gen bin=.
		foreach i of numlist 1/`num_bin' {
			replace bin=`lb'+`width'/2+(`i'-1)*`width' if heterog>=(`lb'+(`i'-1)*`width') & heterog<(`lb'+`i'*`width')
			}
		tab bin
		drop if bin==.
		gen matchCount=1 if matchComplete==1
		*Activate the following command to restrict to Temperature > 27 C or PM2.5 > 100 while keeping the bins for the full sample
		*keep if Ttp_bin_234==1|Ppm25_bin_234==1
		collapse (sum) underdog_wonM matchCount, by(bin)
		gen favorite_wonM=1-underdog_wonM/matchCount
		gen std_err=(favorite_wonM * (1-favorite_wonM) / matchCount) ^ .5
		mkmat favorite_wonM std_err, matrix(pointEst_stdErr)
		mat tmp = J(3,10,0)
		mat tmp[1,1] = pointEst_stdErr[1..10,1]'
		mat tmp[2,1] = pointEst_stdErr[1..10,1]'-invnormal(.975)*pointEst_stdErr[1..10,2]'
		mat tmp[3,1] = pointEst_stdErr[1..10,1]'+invnormal(.975)*pointEst_stdErr[1..10,2]'
		gen bin_str=string(round(bin,1))
		mat colnames tmp = "6" "15" "23" "32" "40" "49" "57" "66" "74" "83"
		coefplot matrix(tmp[1,.]), ci((tmp[2,.] tmp[3,.])) ylabel(0(.2)1) xlabel() vertical mlabsize(minuscule) scheme(s1manual) ytitle("Proportion of matches won by the stronger player") xtitle("Difference in opponents' pre-match winning probability (%)")
		*graph export $tmp\FavWins_DiffWinProb_wCI.eps, replace
		*graph export $tmp\FavWins_DiffWinProb_wCI_27_100.eps, replace
	restore
	
	*Against WTA rank difference
	preserve
		keep if qualify==0
		gen heterog=rank_diff			
		*Note underdog was defined only for heterog>0 and for completed matches
			gen underdog_wonM=underdogRank_wonM
			local num_bin    =10
		summ heterog if heterog~=0, det
		local lb=r(min)
		local ub=r(p95)
		local width=(`ub'-`lb')/`num_bin'
		gen bin=.
		foreach i of numlist 1/`num_bin' {
			replace bin=`lb'+`width'/2+(`i'-1)*`width' if heterog>=(`lb'+(`i'-1)*`width') & heterog<(`lb'+`i'*`width')
			}
		tab bin
		drop if bin==.
		gen matchCount=1 if matchComplete==1
		*Activate the following command to restrict to Temperature > 27 C or PM2.5 > 100 while keeping the bins for the full sample
		*keep if Ttp_bin_234==1|Ppm25_bin_234==1
		collapse (sum) underdog_wonM matchCount, by(bin)
		gen favorite_wonM=1-underdog_wonM/matchCount
		gen std_err=(favorite_wonM * (1-favorite_wonM) / matchCount) ^ .5
		mkmat favorite_wonM std_err, matrix(pointEst_stdErr)
		mat tmp = J(3,10,0)
		mat tmp[1,1] = pointEst_stdErr[1..10,1]'
		mat tmp[2,1] = pointEst_stdErr[1..10,1]'-invnormal(.975)*pointEst_stdErr[1..10,2]'
		mat tmp[3,1] = pointEst_stdErr[1..10,1]'+invnormal(.975)*pointEst_stdErr[1..10,2]'
		gen bin_str=string(round(bin,1))
		mat colnames tmp = "10" "29" "48" "66" "85" "103" "122" "141" "159" "178"
		coefplot matrix(tmp[1,.]), ci((tmp[2,.] tmp[3,.])) ylabel(0(.2)1) xlabel() vertical mlabsize(minuscule) scheme(s1manual) ytitle("Proportion of matches won by the stronger player") xtitle("Difference in opponents' WTA rank")
		*graph export $tmp\FavWins_DiffRank_wCI.eps, replace
		*graph export $tmp\FavWins_DiffRank_wCI_27_100.eps, replace
	restore
		
	*Against WTA z-score difference
	preserve
		keep if qualify==0
		gen heterog=pts_normal_diff			
		*Note underdog was defined only for heterog>0 and for completed matches
			gen underdog_wonM=underdogPts_wonM
			local num_bin    =10
		summ heterog if heterog~=0, det
		local lb=r(min)
		local ub=r(p95)
		local width=(`ub'-`lb')/`num_bin'
		gen bin=.
		foreach i of numlist 1/`num_bin' {
			replace bin=`lb'+`width'/2+(`i'-1)*`width' if heterog>=(`lb'+(`i'-1)*`width') & heterog<(`lb'+`i'*`width')
			}
		tab bin
		drop if bin==.
		gen matchCount=1 if matchComplete==1
		*Activate the following command to restrict to Temperature > 27 C or PM2.5 > 100 while keeping the bins for the full sample
		*keep if Ttp_bin_234==1|Ppm25_bin_234==1
		collapse (sum) underdog_wonM matchCount, by(bin)
		gen favorite_wonM=1-underdog_wonM/matchCount
		gen std_err=(favorite_wonM * (1-favorite_wonM) / matchCount) ^ .5
		mkmat favorite_wonM std_err, matrix(pointEst_stdErr)
		mat tmp = J(3,10,0)
		mat tmp[1,1] = pointEst_stdErr[1..10,1]'
		mat tmp[2,1] = pointEst_stdErr[1..10,1]'-invnormal(.975)*pointEst_stdErr[1..10,2]'
		mat tmp[3,1] = pointEst_stdErr[1..10,1]'+invnormal(.975)*pointEst_stdErr[1..10,2]'
		gen bin_str=string(round(bin,.1))
		mat colnames tmp = "0.5" "1.5" "2.4" "3.4" "4.4" "5.4" "6.3" "7.3" "8.3" "9.2"
		coefplot matrix(tmp[1,.]), ci((tmp[2,.] tmp[3,.])) ylabel(0(.2)1) xlabel() vertical mlabsize(minuscule) scheme(s1manual) ytitle("Proportion of matches won by the stronger player") xtitle("Difference in opponents' WTA ranking points (in pop. std. dev.)")
		*graph export $tmp\FavWins_DiffNormalPts_wCI.eps, replace
		*graph export $tmp\FavWins_DiffNormalPts_wCI_27_100.eps, replace
	restore
	
	
	*Footnote 16 of the article 
	preserve		/*By early-in-series round type vs. late-in-series round type*/
		keep if matchComplete==1 & qualify==0 & winProb_diffLess25==1
		gen earlyRound=(round>=32 & round<=64)
		collapse (count) N=threeSetter (mean) threeSetter winProb_diff, by(earlyRound) 
		gen se_proportion = sqrt(threeSetter*(1-threeSetter)/N)
	restore
	preserve		/*By WTA series type (rounds 16 to 1 only since not all series have rounds 32 let alone 64)*/
		keep if matchComplete==1 & qualify==0 & winProb_diffLess25==1
		drop if round>=32 & round<=64
		replace city="other China" if city~="melbourne"&city~="beijing"
		collapse (count) N=threeSetter (mean) threeSetter winProb_diff, by(city) 
		gen se_proportion = sqrt(threeSetter*(1-threeSetter)/N)
	restore
	
	*Distribution (p25, p50, p75) of the winning probability difference by round
	summ winProb_diff if qualify==0, det
	bysort round: summ winProb_diff, det
	
	
	*Figure 5
	preserve
		keep if qualify==0
		*Restrict to completed matches
		keep if matchComplete==1
		*Restrict to matches for which temperature and PM2.5 bins are observed
		keep if tp_bin~=.&pm25_bin~=.
		*Symmetry (in ability) has its own bin, bin 0; `num_bin' remaining bins all the way to the maximum difference among main-draw matches (not p95), labeled at the midpoint difference
		local num_bin=9
		gen heterog=winProb_diff			
		summ heterog if heterog~=0, det
		local lb=r(min)
		local ub=r(max)
		local width=(`ub'-`lb')/`num_bin'
		gen bin=.
		foreach i of numlist 1/`num_bin' {
			replace bin=`width'/2+`lb'+(`i'-1)*`width' if heterog>=(`lb'+(`i'-1)*`width') & heterog<(`lb'+`i'*`width')
			}
		replace bin=0 if heterog==0
		drop if bin==.
		gen poorEnv=(Ttp_bin_234==1|Ppm25_bin_234==1)		/*Cutoffs 27 or 100*/
		gen mildEnv=(Ttp_bin_234==0&Ppm25_bin_234==0)
		*gen poorEnv=(Ttp_bin_234==1|Ppm25_bin_4==1)		/*Cutoffs 27 or 200*/
		*gen mildEnv=(Ttp_bin_234==0&Ppm25_bin_4==0)
		*gen poorEnv=(Ttp_bin_34==1|Ppm25_bin_234==1)		/*Cutoffs 29 or 100*/
		*gen mildEnv=(Ttp_bin_34==0&Ppm25_bin_234==0)
		*gen poorEnv=(Ttp_bin_34==1|Ppm25_bin_4==1)			/*Cutoffs 29 or 200*/
		*gen mildEnv=(Ttp_bin_34==0&Ppm25_bin_4==0)
		gen threeSetPoorEnv=(Ttp_bin_234==1|Ppm25_bin_234==1)&threeSetter==1
		gen threeSetMildEnv=(Ttp_bin_234==0&Ppm25_bin_234==0)&threeSetter==1
		*gen threeSetPoorEnv=(Ttp_bin_234==1|Ppm25_bin_4==1)&threeSetter==1
		*gen threeSetMildEnv=(Ttp_bin_234==0&Ppm25_bin_4==0)&threeSetter==1
		*gen threeSetPoorEnv=(Ttp_bin_34==1|Ppm25_bin_234==1)&threeSetter==1
		*gen threeSetMildEnv=(Ttp_bin_34==0&Ppm25_bin_234==0)&threeSetter==1
		*gen threeSetPoorEnv=(Ttp_bin_34==1|Ppm25_bin_4==1)&threeSetter==1
		*gen threeSetMildEnv=(Ttp_bin_34==0&Ppm25_bin_4==0)&threeSetter==1
		collapse (sum) poorEnv mildEnv threeSetPoorEnv threeSetMildEnv, by(bin)
		gen threeSetPropPoorEnv=threeSetPoorEnv/poorEnv
		gen threeSetPropMildEnv=threeSetMildEnv/mildEnv
		label var threeSetPropPoorEnv "> 27 {superscript:O}C or > 100 {&mu}g/m{superscript:3}"
		label var threeSetPropMildEnv "{&le} 27 {superscript:O}C and {&le} 100 {&mu}g/m{superscript:3}"
		*label var threeSetPropPoorEnv "> 27 {superscript:O}C or > 200 {&mu}g/m{superscript:3}"
		*label var threeSetPropMildEnv "{&le} 27 {superscript:O}C and {&le} 200 {&mu}g/m{superscript:3}"
		*label var threeSetPropPoorEnv "> 29 {superscript:O}C or > 100 {&mu}g/m{superscript:3}"
		*label var threeSetPropMildEnv "{&le} 29 {superscript:O}C and {&le} 100 {&mu}g/m{superscript:3}"
		*label var threeSetPropPoorEnv "> 29 {superscript:O}C or > 200 {&mu}g/m{superscript:3}"
		*label var threeSetPropMildEnv "{&le} 29 {superscript:O}C and {&le} 200 {&mu}g/m{superscript:3}"
		twoway (scatter threeSetPropPoorEnv bin, mcolor(red) msymbol(circle)) (scatter threeSetPropMildEnv bin, mcolor(forest_green) msymbol(square)), ytitle("Proportion of matches lasting three sets") ///
			 xtitle("Difference in opponents' pre-match winning probability (%)") ylabel(0(.1).5) xlab(0(10)90) legend(size(medium) nobox) scheme(s1manual)
		*graph export $tmp\ThreeSetter_DiffWinProb_27_100.eps, replace
		*graph export $tmp\ThreeSetter_DiffWinProb_27_200.eps, replace
		*graph export $tmp\ThreeSetter_DiffWinProb_29_100.eps, replace
		*graph export $tmp\ThreeSetter_DiffWinProb_29_200.eps, replace
	restore
	
	
	*Paragraph on uneven first and second battle outcomes toward the end of Section 3 of the article
	preserve
		keep if qualify==0
		gen set1unequal=(set1Games<=7) if matchComplete==1
		gen set2unequal=(set2Games<=7) if matchComplete==1
		gen poorEnvironment     = (Ttp_bin_34==1  | Ppm25_bin_234==1) if tp_bin~=.&pm25_bin~=.
		prtest set1unequal if tp_bin~=.&pm25_bin~=., by(poorEnvironment)
		prtest set2unequal if tp_bin~=.&pm25_bin~=., by(poorEnvironment)
		replace poorEnvironment = (Ttp_bin_234==1 | Ppm25_bin_234==1) if tp_bin~=.&pm25_bin~=.
		prtest set1unequal if tp_bin~=.&pm25_bin~=., by(poorEnvironment)
		prtest set2unequal if tp_bin~=.&pm25_bin~=., by(poorEnvironment)
	restore
	
	
	*Statement on withdrawals at the end of Section 3 of the article
	preserve
		keep if qualify==0
		*Retirement: 79-5 (5 are actually listed walkovers)
		count           if matchComplete==0&score1~=.&score2~=.
		tab matchstatus if matchComplete==0&score1~=.&score2~=.
		*Walkover: 10+5
		count           if matchComplete==0&score1==.&score2==.
		tab matchstatus if matchComplete==0&score1==.&score2==.
		
		summ tp_t_f2 if matchComplete==1&city=="melbourne"
		summ tp_t_f2 if matchComplete==0&city=="melbourne"
		ttest tp_t_f2 if city=="melbourne", by(matchComplete)
		
		summ pm25_t_f2 if matchComplete==1&city=="beijing"
		summ pm25_t_f2 if matchComplete==0&city=="beijing"
		ttest pm25_t_f2 if city=="beijing", by(matchComplete)
		tab matchstatus round if matchComplete==0&city=="beijing"

		summ tp_t_f2 if matchComplete==1&city~="melbourne"&city~="beijing"
		summ tp_t_f2 if matchComplete==0&city~="melbourne"&city~="beijing"
		ttest tp_t_f2 if city~="melbourne"&city~="beijing", by(matchComplete)

		summ pm25_t_f2 if matchComplete==1&city~="melbourne"&city~="beijing"
		summ pm25_t_f2 if matchComplete==0&city~="melbourne"&city~="beijing"
		ttest pm25_t_f2 if city~="melbourne"&city~="beijing", by(matchComplete)
	restore
	
	
	*Footnote 41 of the article (in the appendix)
	preserve
		keep if qualify==0
		prtest winMatch_set1loser=0.5 if totalSets==3 & winProb_diffLess25==1			//totalSets is defined only for completed matches
		gen poorEnvironment = (Ttp_bin_34==1 | Ppm25_bin_234==1) if tp_bin~=.&pm25_bin~=.
		prtest winMatch_set1loser     if totalSets==3 & winProb_diffLess25==1 & tp_bin~=.&pm25_bin~=., by(poorEnvironment)
	restore	
end

descriptives

	

capture program drop reducedForm
program define reducedForm
	use $tmp\PooledAnalysis, clear

	 *Tables 2 and A.2 (and some statistics reported in the introduction of the article)
	 global qualifier "qualify==0"		//Table 2
	*global qualifier "qualify~=."		//Table A.2: Extended sample with qualifying matches, for which data are available)
	*Column 1
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffMore65==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)		//Computes the mean of the dependent variable in the estimation sample
	*Column 2
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess30==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 3
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess25==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 4
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess20==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 5
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess15==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 6
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess10==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 7
	reg threeSetter Ttp_bin_34 Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
    matrix coeff=e(b)					//Reports the number of regressors
   	local numcols = colsof(coeff)
   	local numX = 0
   	foreach i of numlist 1/`numcols' {
   		if coeff[1,`i']~=0 {
   			local ++numX
   			}
   		}
    disp `numX'-1
	*Column 8
	reg threeSetter Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	*Column 9
	reg threeSetter c.winProb_diff##i.Ttp_bin_34 c.winProb_diff##i.Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if $qualifier, cluster(date)
	sum `e(depvar)' if e(sample)
	
	gen poorEnvironment = (Ttp_bin_34==1 | Ppm25_bin_234==1) if tp_bin~=.&pm25_bin~=.
	prtest threeSetter if winProb_diffLess25==1 & $qualifier & tp_bin~=.&pm25_bin~=., by(poorEnvironment)

	
	*Table 3
	*Column 1
	reg incrGames_set1loser Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess30==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 2
	reg incrGames_set1loser Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 3
	reg incrGames_set1loser Ttp_bin_34 Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 4
	reg incrGames_set1loser Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	
	*Column 5
	reg set1Games Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess30==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 6
	reg set1Games Ttp_bin_34 Ppm25_bin_234 if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 7
	reg set1Games Ttp_bin_34 Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 8
	reg set1Games Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if winProb_diffLess25==1 & qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	
	
	*Table 4
	gen underdogBet_loseM = 1 - underdogBet_wonM
	*Column 1
	reg underdogBet_loseM Ttp_bin_34 Ppm25_bin_234 winProb_diff if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 2
	reg underdogBet_loseM Ttp_bin_34 Ppm25_bin_234 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 3
	reg underdogBet_loseM Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 4
	reg underdogBet_loseM c.winProb_diff##i.Ttp_bin_34 c.winProb_diff##i.Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	
	gen underdogBet_loseS1 = 1 - underdogBet_wonS1
	*Column 5
	reg underdogBet_loseS1 Ttp_bin_34 Ppm25_bin_234 winProb_diff if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 6
	reg underdogBet_loseS1 Ttp_bin_34 Ppm25_bin_234 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 7
	reg underdogBet_loseS1 Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	*Column 8
	reg underdogBet_loseS1 c.winProb_diff##i.Ttp_bin_34 c.winProb_diff##i.Ppm25_bin_234 i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	
	gen winProb_wonM_prop = winProb_wonM/100		//(Ex-ante) winning probability of the match winner expressed as a proportion
	*Column 9
	reg winProb_wonM_prop Ttp_bin_34 Ppm25_bin_234 if qualify==0, cluster(date)
	summ `e(depvar)' if e(sample)
	
	
	*Table A.3 (based on tennis point outcomes, which are included in the sample here)
	merge 1:1 matchid using data_oncourt_tennisPointsByMatch, assert(1 3) nogen
    gen pts_match = ptsMatchwinner + ptsMatchLoser if matchComplete==1 
	sum pts_match if city=="melbourne"&qualify==0 			//Complete remaining variable of Table 1's descriptive statistics (see the beginning of program descriptives)
	sum pts_match if city=="beijing"&qualify==0
	sum pts_match if city~="melbourne"&city~="beijing"&qualify==0
	gen     favoritePtsShare_match = ptsMatchwinner/pts_match if winProb_p1>winProb_p2 & score1>score2 	//Player 1 is favorite
    replace favoritePtsShare_match = ptsMatchLoser/pts_match  if winProb_p1>winProb_p2 & score1<score2
    replace favoritePtsShare_match = ptsMatchwinner/pts_match if winProb_p1<winProb_p2 & score1<score2	//Player 2 is favorite
    replace favoritePtsShare_match = ptsMatchLoser/pts_match  if winProb_p1<winProb_p2 & score1>score2
	replace favoritePtsShare_match = . if ptsMatchwinner<48		//A completed match requires that the winner win at least 48 points (2 sets, 6 games per set, 4 points per game, totaling 2*6*4 = 48)
	*Column 1
    reg favoritePtsShare_match Ttp_bin_34 Ppm25_bin_234 	       winProb_diff                                                              if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)
	*Column 2
    reg favoritePtsShare_match Ttp_bin_34 Ppm25_bin_234            winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)
	*Column 3
    reg favoritePtsShare_match Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)
	 
    use data_flashscore_tennisPointsBySet, clear
	ren tirbreaker tiebreaker
	tab games if set<=2 & tiebreaker==0
	summ if set<=2 & tiebreaker==1
    keep if games<=13
	keep if set<=2
	bysort matchid set (order): gen     winner_set = 1 if p1games[_N]>p2games[_N]
    bysort matchid set (order): replace winner_set = 2 if p1games[_N]<p2games[_N]
    bysort matchid set: egen p1_pts_set = total(p1pts)
    bysort matchid set: egen p2_pts_set = total(p2pts)
    keep matchid set winner_set p1_pts_set p2_pts_set
    bysort matchid set: keep if _n==1
    bysort matchid: gen matchnum = (_n==1)
    replace matchnum = sum(matchnum)
    reshape wide p1_pts_set p2_pts_set winner_set, i(matchnum) j(set)
	replace p1_pts_set1=. if p1_pts_set1+p2_pts_set1==0
	replace p2_pts_set1=. if p1_pts_set1==.
	keep matchid winner_set1 p1_pts_set1 p2_pts_set1		//To replicate columns 4-6
    gen pts_set1 = p1_pts_set1 + p2_pts_set1
    merge 1:1 matchid using $tmp\PooledAnalysis, keep(2 3) nogen
    gen     favoritePtsShare_set1 = p1_pts_set1/pts_set1 if winProb_p1>winProb_p2 & matchComplete==1
    replace favoritePtsShare_set1 = p2_pts_set1/pts_set1 if winProb_p1<winProb_p2 & matchComplete==1
    *Column 4
    reg favoritePtsShare_set1 Ttp_bin_34 Ppm25_bin_234            winProb_diff                                                              if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)
	*Column 5
    reg favoritePtsShare_set1 Ttp_bin_34 Ppm25_bin_234            winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)
	*Column 6
    reg favoritePtsShare_set1 Ttp_bin_34 Ppm25_bin_23 Ppm25_bin_4 winProb_diff i.cityID##i.round i.year i.hour_bin hm_t_f2 wsp_t_f2 dv_rain if qualify==0, cluster(date)
    summ `e(depvar)' if e(sample)   
end
	
reducedForm
	
	
	
capture program drop roofClosedHeatRain
program define roofClosedHeatRain
	import excel using "data_press_roofClosedHeat.xlsx", sheet("Sheet1") cellrange(A1:H65) firstrow clear
	ren closed closedHeat
	keep matchid closedHeat
	save $tmp\tmp_roofClosedHeat, replace
	
	import excel using "data_press_roofClosedRain.xlsx", sheet("Sheet1") cellrange(A1:J150) firstrow clear
	gen closedRain=(closed=="Yes")
	keep matchid closedRain
	save $tmp\tmp_roofClosedRain, replace
end



capture program drop dataMatlab
program define dataMatlab
	
	roofClosedHeatRain

	use $tmp\PooledAnalysis, clear
	
	merge 1:1 matchid using $tmp\tmp_roofClosedHeat, assert(1 3) nogen
	replace closedHeat   =0 if closedHeat   ==.
	erase $tmp\tmp_roofClosedHeat.dta
	
	merge 1:1 matchid using $tmp\tmp_roofClosedRain, assert(1 3) nogen
	replace closedRain   =0 if closedRain   ==.
	erase $tmp\tmp_roofClosedRain.dta
	
	*Restrict to completed matches
	keep if matchComplete==1
	
	*Restrict to main-draw matches
	keep if qualify==0
	
	*Restrict to matches for which temperature and PM2.5 bins are observed
	keep if tp_bin~=.&pm25_bin~=.
	
	global matlabvar1 "w1 l1 w2 l2 w3 l3 wSets lSets tSets dvBet wWinProb lWinProb winProbDiff smallDiff dvRank wRank lRank dvPts wPts lPts wPtsN lPtsN"
	global matlabvar2 "dvAge wAge lAge ageDiff prizeCashK prizePts tp Cpm25 dvRain raining dvAussieO dvChinaO dvWuhanO dvOtherC roundM roofClosedHeat roofClosedRain cityID yearSeries"
		
	*The following list of variables exactly follows the preceding macro and the commands that read variables in Matlab
	gen w1=set1game1*(score1>score2)+set1game2*(score1<score2)		//1. set 1 games won by match winner
	gen l1=set1game1*(score1<score2)+set1game2*(score1>score2)		//2. set 1 games won by match loser
	gen w2=set2game1*(score1>score2)+set2game2*(score1<score2)		//3. set 2 games won by match winner
	gen l2=set2game1*(score1<score2)+set2game2*(score1>score2)		//4. set 2 games won by match loser
	gen w3=set3game1*(score1>score2)+set3game2*(score1<score2)		//5. set 3 games won by match winner (if applicable)
	gen l3=set3game1*(score1<score2)+set3game2*(score1>score2)		//6. set 3 games won by match loser (if applicable)
	gen wSets=score1*(score1>score2)+score2*(score1<score2)			//7. number of sets won by match winner
	gen lSets=score1*(score1<score2)+score2*(score1>score2)			//8. number of sets won by match loser
	gen tSets=totalSets           									//9. total number of sets
	gen dvBet=(bet1~=.&bet2~=.)											//10. pre-match odds are available for both players
	gen wWinProb=winProb_p1*(score1>score2)+winProb_p2*(score1<score2)	//11. (ex-ante) winning probability of match winner (if pre-match odds are available)
	gen lWinProb=winProb_p1*(score1<score2)+winProb_p2*(score1>score2)	//12. (ex-ante) winning probability of match loser (if pre-match odds are available)
	gen winProbDiff=winProb_diff										//13. winProb_diff
	gen smallDiff=winProb_diffLess30									//14. indicator for winProb_diff<=30
	gen dvRank=(rank1~=.&rank2~=.)													//15. WTA ranks are available for both players
	gen wRank=rank1*(score1>score2)+rank2*(score1<score2) if rank1~=.&rank2~=.		//16. WTA rank of match winner (if both players are ranked)
	gen lRank=rank1*(score1<score2)+rank2*(score1>score2) if rank1~=.&rank2~=.		//17. WTA rank of match loser (if both players are ranked)
	gen dvPts=(pts1~=.&pts2~=.)																		//18. WTA ranking points are available for both players
	gen wPts=pts1*(score1>score2)+pts2*(score1<score2) if pts1~=.&pts2~=.							//19. WTA ranking points of match winner (if both players have points)
	gen lPts=pts1*(score1<score2)+pts2*(score1>score2) if pts1~=.&pts2~=.							//20. WTA ranking points of match loser (if both players have points)
	gen wPtsN=pts_normalized1*(score1>score2)+pts_normalized2*(score1<score2) if pts1~=.&pts2~=.	//21. WTA z-score of match winner (if both players have points)
	gen lPtsN=pts_normalized1*(score1<score2)+pts_normalized2*(score1>score2) if pts1~=.&pts2~=.    //22. WTA z-score of match loser (if both players have points)
	gen dvAge=(age_p1~=.&age_p2~=.)							//23. age is available for both players
	gen wAge=age_p1*(score1>score2)+age_p2*(score1<score2)	//24. age of match winner (if age is available)
	gen lAge=age_p1*(score1<score2)+age_p2*(score1>score2)	//25. age of match loser (if age is available)
	gen ageDiff=age_diff									//26. age_diff
	gen prizeCashK=prizeCash/1000		//27. win-lose cash prize spread of match 
	gen prizePts=.						//28. win-lose WTA ranking points spread of match (not used)
	gen tp=tp_t_f2						//29. temperature during match
	gen Cpm25=pm25per100_t_f2        	//30. pollution during match (China only; Australia always below threshold)
	replace Cpm25=0 if city=="melbourne"
	gen dvRain=(dv_rain~=.)				//31. record of rain is available for match
	gen raining=dv_rain					//32. dummy for any rain during match
	gen dvAussieO=(city=="melbourne")								//33. Australian Open match	
	gen dvChinaO =(city=="beijing")									//34. China Open match
	gen dvWuhanO =(city=="wuhan")									//35. Wuhan Open match
	gen dvOtherC =(city~="melbourne"&city~="beijing"&city~="wuhan")	//36. Other Chinese WTA series match (other than in Beijing and in Wuhan)
	gen roundM=round					//37. round of WTA series
	gen roofClosedHeat=closedHeat		//38. roofClosedHeat
	gen roofClosedRain=closedRain		//39. roofClosedRain
										//40. cityID 
										//41. yearSeries		
	sort cityID round yearSeries
	order    $matlabvar1 $matlabvar2
	summ     $matlabvar1 $matlabvar2
	outsheet $matlabvar1 $matlabvar2 using "$tmp\Estimation_data_all.txt", nonames replace
end

dataMatlab


	
